shhh <- suppressPackageStartupMessages # It's a library, so shhh!

shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))

shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)

shhh(library(car))
shhh(library(coda))
shhh(library(gridExtra))

theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}

Read in MoTR Data


rate = 160

file_prefix = "../data/provo_f160/"
fnames = list.files(path=file_prefix)

df = data.frame()
for (f in fnames) {
  temp = read.csv(paste0(file_prefix, "/", f)) %>%
    mutate(subj = str_remove(f, "_reading_measures.csv"))
  df = rbind(df, temp)
}

# Filter out readers whose accuracy to the comprehension questions were less than 80%.
filter_df = df %>%
  group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
  drop_na() %>%
  group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
  mutate(p_correct = round(p_correct, digits = 2))
`summarise()` has grouped output by 'para_nr'. You can override using the `.groups` argument.
filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
pilot_exceptions <- c("reader_255", "reader_256", "reader_259", "reader_261", "reader_262", "reader_263")

raw_df = df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "reader_")) %>%
  mutate(subj = as.character(subj)) %>%
  mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
  dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)
length(unique(raw_df$subj))
[1] 98
df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  filter(FPReg >= 0) %>%
  dplyr::select(FPReg) %>%
  drop_na() %>%
  summarise( m = mean(FPReg))

df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  dplyr::select(FPFix) %>%
  drop_na() %>%
  summarise( m = mean(FPFix))
NA
NA
# View(raw_df)
# Average across subjects
motr_agg_df = raw_df %>%
  gather(metric, value, 6:10) %>%
    filter(value >= 0) %>% #Removes the "NA" values for FPReg
  
    # ==== Remove skipped words
    # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
    # filter(zero == F) %>%
  
    drop_na() %>%
    group_by(para_nr, word_nr, word, metric) %>%
    
    # === Remove outliers > 3SD
      # mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
  
      summarise(value = mean(value), nsubj = length(unique(subj))) %>%
  ungroup() %>%
  arrange(para_nr, word_nr) %>%
  rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
`summarise()` has grouped output by 'para_nr', 'word_nr', 'word'. You can override using the `.groups` argument.
# View(motr_agg_df)

Comparison to Provo

# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../data/provo_stats.csv") %>%
  dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
  rename(word_idx = trigger_idx)

provo_modeling_df
NA
# Read in Provo eyetracking data

provo_raw_df = read.csv("../data/provo_eyetracking.csv")

# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))

provo_eyetracking_df = provo_raw_df %>%
  dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
  rename( #first_duration = IA_FIRST_FIXATION_DURATION,   
          gaze_duration = IA_FIRST_RUN_DWELL_TIME,
          total_duration = IA_DWELL_TIME,
          go_past_time = IA_REGRESSION_PATH_DURATION,
          subj = Participant_ID,
          text_id = Text_ID,
          sent_id = Sentence_Number,
          word_idx = Word_In_Sentence_Number,
          word_text_idx = Word_Number,   # IA_ID?
          word = Word,      # Word?
          FPReg = IA_REGRESSION_OUT,
          skip = IA_SKIP,
          ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
  mutate(first_duration = gaze_duration) %>%
  mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
         go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
  dplyr::select(-ff_progressive) %>%
  
  mutate(
    gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
      go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
      FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
      first_duration =  if_else(total_duration == 0, 0, as.double(first_duration)),
  ) %>%
  gather(metric, value, 7:12) %>%
  filter(value >= 0) %>%          # filter skipped word in eye tracking data for FPReg
  
  # ==== Remove skipped words
  # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
  # filter(zero == F) %>%
  
  # mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
  # mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
  drop_na() %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "Sub")) %>%
  mutate(subj = as.integer(subj)) %>%
    group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
  
  # === Remove outliers > 3SD
    # mutate(outlier = if_else(! metric %in% c("FPReg", "skip") & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
    # filter(outlier == F) %>%
  
  ungroup() #%>%

# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value),
              nsubj = length(unique(subj))) %>%
    ungroup()
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
provo_raw_df %>%
  dplyr::select(IA_REGRESSION_OUT) %>%
  drop_na() %>%
  summarise( m = mean(IA_REGRESSION_OUT))

provo_raw_df %>%
  dplyr::select(IA_SKIP) %>%
  drop_na() %>%
  summarise( m = mean(IA_SKIP))
NA

# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
  filter(subj <= 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
  rename(value_1 = value) #%>%
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
  # dplyr::select(-sent_id, -word_idx)


provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
  arrange(text_id, sent_id, word_idx) %>%
  filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
  rename(word = word.y) %>%
  dplyr::select(text_id, word_text_idx, metric, word, value_1) 

# View(provo_eyetracking_subj1_df)

provo_eyetracking_subj2_df = provo_eyetracking_df %>%
  filter(subj > 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
    rename(value_2 = value)%>%
  dplyr::select(-sent_id, -word_idx)
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_subj2_df)
  
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
  # filter(word.x == word.y) %>%
  dplyr::select(-word.y) %>%
  # === Remove outliers > 3SD
  # group_by(metric) %>%
  #   mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
  #   filter(motr_outlier == F) %>%
  #   mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
  #   filter(eyetr_outlier == F) %>%
  # ungroup() %>%
  
  gather(measure, value, c("value_1", "value_2")) #%>%
  # dplyr::select(-motr_outlier, -eyetr_outlier)

# View(provo_eyetr_grouped_df)
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  arrange(text_id, sent_id, word_idx) %>%
  rename(eyetr_value = value) 

provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
  # almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
  
# === Remove outliers > 3SD
# group_by(metric) %>%
#   mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
#   filter(motr_outlier == F) %>%
#   mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
#   filter(eyetr_outlier == F) %>%
# ungroup() %>%
  
gather(measure, value, c("eyetr_value", "motr_value")) #%>%
# dplyr::select(-motr_outlier, -eyetr_outlier)
  
# provo_df

Bayesian – use Stan – motr & eyetr correlation

print("Gaze Duration")
[1] "Gaze Duration"
gd_df = provo_df %>% filter(metric == "gaze_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
   cor 
0.8174 
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
   cor 
0.7784 
# View(gd_df)
gd_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

NA
# center data around 0.

gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
   # mutate(eyetr_value = eyetr_value - mean(eyetr_value),
   #      motr_value = motr_value - mean(motr_value)) %>%
  data.matrix()

gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

gd_data = list(x=gd_temp, N=nrow(gd_temp))

fit_gd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds"))
print("Go Past Time")
[1] "Go Past Time"
gpt_df = provo_df %>% filter(metric == "go_past_time") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
   cor 
0.7262 
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)
  cor 
0.724 
gpt_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()

gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds"))
print("Total Duration")
[1] "Total Duration"
td_df = provo_df %>% filter(metric == "total_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
   cor 
0.7982 
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)
   cor 
0.7752 
td_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()

td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=td_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds"))
print("First Pass Regression Prob.")
[1] "First Pass Regression Prob."
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)
   cor 
0.3929 
# View(reg_df)
reg_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds"))

rank transformation for FPReg

reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  mutate(
    eyetr_value_rank = ifelse(eyetr_value > 0, rank(eyetr_value), NA),
    motr_value_rank = ifelse(motr_value > 0, rank(motr_value), NA)
  ) %>%
  drop_na()
#   mutate(
#   eyetr_value_rank = rank(eyetr_value, ties.method = "average"),
#   motr_value_rank = rank(motr_value, ties.method = "average")
# )
# View(reg_df)

print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank)$estimate)
print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank, method = "kendall"))


reg_df %>% 
  gather(measure, value, 14:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value_rank", "motr_value_rank")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Rank Transformed")
# -------fit model FPReg RANK----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_correlation_rank.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds"))
fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds")
fit_reg_rank = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds")

# models for drop 0s
# fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_motr_eyetr_FPReg_cor.rds")

print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_gd)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           234.30    0.01   0.90    232.53    233.69    234.29    234.91    236.06  6945    1
mu[2]           371.77    0.02   1.74    368.30    370.63    371.77    372.91    375.24  6640    1
sigma[1]         37.80    0.01   0.81     36.24     37.26     37.79     38.34     39.42  5283    1
sigma[2]         71.83    0.02   1.46     69.03     70.82     71.83     72.82     74.71  5227    1
nu                4.46    0.00   0.30      3.92      4.25      4.45      4.65      5.08  5469    1
rho               0.51    0.00   0.02      0.48      0.50      0.51      0.52      0.54  6947    1
cov[1,1]       1429.34    0.84  61.16   1313.63   1388.16   1427.81   1469.75   1553.95  5276    1
cov[1,2]       1391.14    1.15  78.47   1239.24   1339.15   1390.35   1441.79   1551.58  4667    1
cov[2,1]       1391.14    1.15  78.47   1239.24   1339.15   1390.35   1441.79   1551.58  4667    1
cov[2,2]       5161.37    2.91 210.40   4765.32   5015.23   5159.80   5302.57   5580.95  5231    1
x_rand[1]       235.01    0.55  49.34    138.86    206.52    234.61    262.42    336.83  8089    1
x_rand[2]       373.62    1.07  95.15    182.83    319.23    372.21    425.30    567.12  7980    1
attempt           0.00    0.00   0.05      0.00      0.00      0.00      0.00      0.00  8050    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -25082.82    0.03   1.80 -25087.41 -25083.73 -25082.46 -25081.52 -25080.40  3875    1

Samples were drawn using NUTS(diag_e) at Tue Feb  6 00:04:08 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_gpt)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%      25%       50%       75%     97.5% n_eff Rhat
mu[1]           306.99    0.02   1.71    303.61    305.9    306.97    308.14    310.35  8255    1
mu[2]           425.10    0.03   2.96    419.39    423.0    425.10    427.11    430.79  8294    1
sigma[1]         67.11    0.02   1.47     64.27     66.1     67.10     68.10     70.03  7296    1
sigma[2]        116.24    0.03   2.64    111.16    114.5    116.21    118.01    121.46  7232    1
nu                2.26    0.00   0.09      2.08      2.2      2.26      2.32      2.45  7418    1
rho               0.42    0.00   0.02      0.38      0.4      0.42      0.43      0.45  8786    1
cov[1,1]       4505.28    2.31 197.26   4130.64   4369.6   4501.91   4637.37   4904.27  7307    1
cov[1,2]       3248.81    2.51 213.71   2842.87   3101.4   3244.96   3391.94   3668.78  7277    1
cov[2,1]       3248.81    2.51 213.71   2842.87   3101.4   3244.96   3391.94   3668.78  7277    1
cov[2,2]      13517.55    7.22 613.53  12357.23  13097.8  13505.59  13925.78  14752.13  7226    1
x_rand[1]       318.77    1.49 134.42    121.89    258.4    309.38    363.21    564.06  8092    1
x_rand[2]       451.19    3.26 294.27    111.69    344.8    430.62    522.20    876.86  8131    1
attempt           0.04    0.00   0.21      0.00      0.0      0.00      0.00      1.00  8093    1
max_attempts     10.00     NaN   0.00     10.00     10.0     10.00     10.00     10.00   NaN  NaN
lp__         -29010.67    0.03   1.76 -29015.10 -29011.6 -29010.35 -29009.40 -29008.25  3994    1

Samples were drawn using NUTS(diag_e) at Tue Feb  6 00:05:58 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_td)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           266.87    0.02   1.28    264.39    266.01    266.86    267.75    269.33  5448    1
mu[2]           420.80    0.03   2.46    416.02    419.15    420.76    422.47    425.53  5573    1
sigma[1]         52.62    0.02   1.10     50.47     51.88     52.61     53.36     54.78  4500    1
sigma[2]        100.49    0.03   2.03     96.58     99.11    100.48    101.84    104.53  4579    1
nu                4.28    0.00   0.27      3.79      4.09      4.27      4.46      4.85  5361    1
rho               0.62    0.00   0.01      0.59      0.61      0.62      0.63      0.65  6752    1
cov[1,1]       2769.92    1.73 115.77   2547.56   2691.20   2768.16   2846.87   3000.64  4498    1
cov[1,2]       3278.82    2.57 164.70   2966.61   3164.38   3277.56   3386.73   3608.01  4100    1
cov[2,1]       3278.82    2.57 164.70   2966.61   3164.38   3277.56   3386.73   3608.01  4100    1
cov[2,2]      10102.99    6.04 408.74   9326.88   9822.79  10095.99  10371.05  10925.76  4584    1
x_rand[1]       268.11    0.79  69.98    127.37    228.67    268.19    306.65    407.71  7859    1
x_rand[2]       427.30    1.52 133.15    166.09    351.69    423.23    498.89    698.02  7666    1
attempt           0.01    0.00   0.09      0.00      0.00      0.00      0.00      0.00  8123    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -26591.96    0.03   1.74 -26596.19 -26592.83 -26591.63 -26590.69 -26589.56  3343    1

Samples were drawn using NUTS(diag_e) at Tue Feb  6 00:07:59 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_reg)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.14    0.00 0.00    0.13    0.14    0.14    0.14    0.14  8022    1
mu[2]           0.09    0.00 0.00    0.08    0.08    0.09    0.09    0.09  8135    1
sigma[1]        0.08    0.00 0.00    0.08    0.08    0.08    0.08    0.09  7142    1
sigma[2]        0.05    0.00 0.00    0.04    0.05    0.05    0.05    0.05  7221    1
nu              2.78    0.00 0.18    2.46    2.65    2.77    2.89    3.14  7451    1
rho             0.21    0.00 0.03    0.16    0.19    0.21    0.23    0.27  9921    1
cov[1,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  7143    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8463    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8463    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  7208    1
x_rand[1]       0.17    0.00 0.13    0.02    0.10    0.15    0.21    0.42  7885    1
x_rand[2]       0.10    0.00 0.07    0.01    0.06    0.09    0.12    0.25  7935    1
attempt         0.17    0.01 0.46    0.00    0.00    0.00    0.00    1.00  7711    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         4545.35    0.03 1.75 4541.07 4544.43 4545.68 4546.62 4547.74  3653    1

Samples were drawn using NUTS(diag_e) at Tue Feb  6 00:09:17 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. RANK--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. RANK--------------------------------------------"
print(fit_reg_rank)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean       sd      2.5%       25%       50%      75%     97.5% n_eff Rhat
mu[1]          1374.90    0.18    18.98   1337.48   1362.16   1374.73   1387.9   1411.91 11231    1
mu[2]          1804.27    0.11    11.31   1782.23   1796.66   1804.43   1811.9   1826.26 10240    1
sigma[1]        696.99    0.12    12.87    672.39    688.36    696.94    705.4    723.03 10941    1
sigma[2]        418.96    0.08     7.84    403.87    413.54    418.83    424.2    434.92 10853    1
nu              102.55    0.23    23.98     63.02     85.64    100.14    116.5    157.44 10504    1
rho               0.19    0.00     0.03      0.14      0.17      0.19      0.2      0.24 11138    1
cov[1,1]     485954.71  172.11 17956.82 452113.94 473835.62 485723.58 497595.5 522770.76 10885    1
cov[1,2]      54509.75   75.79  7861.98  39206.85  49139.03  54379.02  59723.2  70306.93 10762    1
cov[2,1]      54509.75   75.79  7861.98  39206.85  49139.03  54379.02  59723.2  70306.93 10762    1
cov[2,2]     175589.65   63.18  6576.69 163113.47 171011.52 175420.06 179976.5 189159.61 10834    1
x_rand[1]      1420.17    7.48   659.95    217.73    942.20   1404.35   1864.0   2767.59  7778    1
x_rand[2]      1804.04    4.94   427.24    964.46   1521.76   1803.44   2090.0   2641.59  7493    1
attempt           0.03    0.00     0.18      0.00      0.00      0.00      0.0      1.00  8072    1
max_attempts     10.00     NaN     0.00     10.00     10.00     10.00     10.0     10.00   NaN  NaN
lp__         -20329.53    0.03     1.75 -20333.76 -20330.49 -20329.20 -20328.2 -20327.15  4066    1

Samples were drawn using NUTS(diag_e) at Tue Feb  6 00:12:53 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)

# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)

# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)

# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)


# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.5121
HPD: [0.4784, 0.5441]
crI: [0.4789, 0.5448]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4163
HPD: [0.3793, 0.455]
crI: [0.378, 0.4539]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6197
HPD: [0.5915, 0.6471]
crI: [0.5912, 0.647]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.2139
HPD: [0.1561, 0.2715]
crI: [0.1554, 0.2711]
print('---------------------------- First Pass Regression RANK--------------------------------------------')
[1] "---------------------------- First Pass Regression RANK--------------------------------------------"
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg_rank, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.1866
HPD: [0.1359, 0.2356]
crI: [0.1367, 0.2367]
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

model motr eyetr FPReg correlation (eyetr < 0.3)

print("First Pass Regression Prob. all and  < 0.3")
[1] "First Pass Regression Prob. all and  < 0.3"
reg_df_all = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))

reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)

reg_df_low = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
   cor 
0.3047 
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
[1] 0.000000000000000000000000000000000000000000000000000000123
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
   cor 
0.1065 
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
[1] 0.0000003928
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
    cor 
0.09068 
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)
[1] 0.001411
# View(reg_df)
reg_df_low %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
     main = "Not Log-Transformed")

# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))

model motr eyetr FPReg correlation (eyetr >= 0.3)

print("First Pass Regression Prob. >= 0.3")
[1] "First Pass Regression Prob. >= 0.3"
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)

reg_df_high = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
   cor 
0.4199 
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
[1] 0.0000000000002953
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
   cor 
0.4889 
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)
[1] 0.000000000001436
# View(reg_df)
reg_df_high %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))

# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
     main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
     main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
     main = "FPReg not logged eyetr >= 0.3")

# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0(".//bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
fit_mreg_all = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1_drop0s.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_mreg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.12    0.00 0.00    0.12    0.12    0.12    0.13    0.13  5811    1
mu[2]           0.03    0.00 0.00    0.02    0.03    0.03    0.03    0.03  3384    1
sigma[1]        0.08    0.00 0.00    0.07    0.07    0.08    0.08    0.08  3694    1
sigma[2]        0.05    0.00 0.00    0.05    0.05    0.05    0.05    0.06  3040    1
nu              2.42    0.00 0.14    2.16    2.33    2.42    2.52    2.71  3250    1
rho             0.16    0.00 0.02    0.11    0.14    0.16    0.17    0.20  8176    1
cov[1,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  3693    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  6348    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  6348    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  3053    1
x_rand[1]       0.16    0.00 0.12    0.02    0.09    0.14    0.20    0.40  7811    1
x_rand[2]       0.07    0.00 0.08    0.00    0.03    0.05    0.09    0.25  7966    1
attempt         0.63    0.01 1.03    0.00    0.00    0.00    1.00    3.00  7919    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         7450.58    0.03 1.78 7446.22 7449.65 7450.92 7451.89 7452.98  3519    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 23:08:09 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------"
print(fit_mreg_all_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.15    0.00 0.00    0.14    0.15    0.15    0.15    0.16  8507    1
mu[2]           0.14    0.00 0.00    0.13    0.14    0.14    0.14    0.14  7923    1
sigma[1]        0.09    0.00 0.00    0.08    0.08    0.09    0.09    0.09  7186    1
sigma[2]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.06  6227    1
nu              2.78    0.00 0.23    2.37    2.62    2.77    2.93    3.26  7008    1
rho             0.18    0.00 0.04    0.11    0.16    0.18    0.21    0.26  9833    1
cov[1,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  7173    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8599    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8599    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  6234    1
x_rand[1]       0.18    0.00 0.13    0.02    0.10    0.16    0.22    0.43  7699    1
x_rand[2]       0.15    0.00 0.08    0.03    0.10    0.14    0.18    0.34  8175    1
attempt         0.16    0.00 0.43    0.00    0.00    0.00    0.00    1.00  7917    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         2566.44    0.03 1.75 2562.18 2565.53 2566.76 2567.72 2568.88  3487    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 23:18:11 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_mreg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.12    0.00 0.00    0.11    0.12    0.12    0.12    0.12  8566    1
mu[2]           0.03    0.00 0.00    0.03    0.03    0.03    0.04    0.04  4981    1
sigma[1]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.07  7280    1
sigma[2]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.07  4552    1
nu              5.54    0.01 0.51    4.62    5.19    5.51    5.86    6.62  4637    1
rho             0.10    0.00 0.02    0.06    0.09    0.10    0.12    0.15  8596    1
cov[1,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  7299    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8259    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8259    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  4571    1
x_rand[1]       0.13    0.00 0.07    0.02    0.08    0.12    0.17    0.28  8277    1
x_rand[2]       0.07    0.00 0.06    0.00    0.03    0.06    0.10    0.21  7280    1
attempt         0.53    0.01 0.90    0.00    0.00    0.00    1.00    3.00  8006    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         7794.82    0.03 1.78 7790.50 7793.88 7795.16 7796.13 7797.26  3232    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 20:52:03 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------"
print(fit_mreg_low_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.13    0.00 0.00    0.13    0.13    0.13    0.13    0.14 10534    1
mu[2]           0.13    0.00 0.00    0.13    0.13    0.13    0.14    0.14  8839    1
sigma[1]        0.06    0.00 0.00    0.06    0.06    0.06    0.06    0.07  8534    1
sigma[2]        0.06    0.00 0.00    0.06    0.06    0.06    0.07    0.07  7122    1
nu              6.50    0.01 0.96    4.90    5.83    6.40    7.06    8.73  6790    1
rho             0.07    0.00 0.04    0.00    0.05    0.07    0.10    0.15 10356    1
cov[1,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  8526    1
cov[1,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00 10435    1
cov[2,1]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00 10435    1
cov[2,2]        0.00    0.00 0.00    0.00    0.00    0.00    0.00    0.00  7123    1
x_rand[1]       0.14    0.00 0.07    0.02    0.09    0.13    0.18    0.28  8031    1
x_rand[2]       0.14    0.00 0.07    0.02    0.10    0.14    0.18    0.29  8006    1
attempt         0.08    0.00 0.30    0.00    0.00    0.00    0.00    1.00  7642    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         2737.21    0.03 1.76 2732.81 2736.27 2737.56 2738.50 2739.62  3992    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 20:30:02 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_mreg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

               mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
mu[1]          0.45    0.00  0.01   0.44   0.45   0.45   0.46   0.47  5045    1
mu[2]          0.15    0.00  0.01   0.13   0.15   0.15   0.16   0.18  4628    1
sigma[1]       0.11    0.00  0.01   0.10   0.11   0.11   0.12   0.12  5622    1
sigma[2]       0.15    0.00  0.01   0.13   0.15   0.15   0.16   0.17  4499    1
nu            25.71    0.18 12.83   9.59  16.53  22.89  31.70  58.03  5109    1
rho            0.41    0.00  0.06   0.30   0.37   0.41   0.45   0.51  7137    1
cov[1,1]       0.01    0.00  0.00   0.01   0.01   0.01   0.01   0.02  5625    1
cov[1,2]       0.01    0.00  0.00   0.00   0.01   0.01   0.01   0.01  4842    1
cov[2,1]       0.01    0.00  0.00   0.00   0.01   0.01   0.01   0.01  4842    1
cov[2,2]       0.02    0.00  0.00   0.02   0.02   0.02   0.03   0.03  4553    1
x_rand[1]      0.47    0.00  0.11   0.25   0.39   0.47   0.54   0.70  7964    1
x_rand[2]      0.20    0.00  0.13   0.01   0.10   0.19   0.28   0.49  7829    1
attempt        0.19    0.01  0.47   0.00   0.00   0.00   0.00   1.00  7881    1
max_attempts  10.00     NaN  0.00  10.00  10.00  10.00  10.00  10.00   NaN  NaN
lp__         579.26    0.03  1.75 575.10 578.35 579.56 580.56 581.68  3529    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 22:25:38 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------"
print(fit_mreg_high_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

               mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
mu[1]          0.48    0.00  0.01   0.46   0.48   0.48   0.49   0.51  6409    1
mu[2]          0.27    0.00  0.01   0.25   0.26   0.27   0.28   0.30  6397    1
sigma[1]       0.12    0.00  0.01   0.11   0.12   0.12   0.13   0.14  6555    1
sigma[2]       0.16    0.00  0.01   0.14   0.15   0.16   0.17   0.18  6281    1
nu            32.85    0.16 15.27  11.84  21.65  30.12  40.80  69.54  8953    1
rho            0.51    0.00  0.07   0.37   0.47   0.52   0.56   0.64  6986    1
cov[1,1]       0.02    0.00  0.00   0.01   0.01   0.02   0.02   0.02  6491    1
cov[1,2]       0.01    0.00  0.00   0.01   0.01   0.01   0.01   0.02  5145    1
cov[2,1]       0.01    0.00  0.00   0.01   0.01   0.01   0.01   0.02  5145    1
cov[2,2]       0.03    0.00  0.00   0.02   0.02   0.03   0.03   0.03  6242    1
x_rand[1]      0.49    0.00  0.13   0.25   0.41   0.49   0.57   0.74  8129    1
x_rand[2]      0.29    0.00  0.15   0.04   0.18   0.28   0.38   0.60  7467    1
attempt        0.06    0.00  0.24   0.00   0.00   0.00   0.00   1.00  7913    1
max_attempts  10.00     NaN  0.00  10.00  10.00  10.00  10.00  10.00   NaN  NaN
lp__         300.28    0.03  1.75 296.05 299.35 300.60 301.58 302.70  3555    1

Samples were drawn using NUTS(diag_e) at Sat Aug  5 22:31:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)

# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)

# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
# 
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)

# FPReg > 0.3
stan_trace(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

stan_trace(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1574
HPD: [0.1139, 0.2017]
crI: [0.1134, 0.2014]
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1843
HPD: [0.1106, 0.2573]
crI: [0.1114, 0.2581]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.103
HPD: [0.05775, 0.1468]
crI: [0.05805, 0.1475]
print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------"
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.07362
HPD: [-0.000616, 0.1514]
crI: [-0.003759, 0.1491]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4102
HPD: [0.3001, 0.5155]
crI: [0.2992, 0.5146]
print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------"
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.514
HPD: [0.3797, 0.6456]
crI: [0.3728, 0.6412]
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")

# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

model eye tracking to eye tracking correlation


print("Gaze Duration")
[1] "Gaze Duration"
# View(provo_eyetr_grouped_df)

egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)
   cor 
0.9093 
# View(egd_df)

egd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

egd_data = list(x=egd_temp, N=nrow(egd_temp))

fit_egd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models/bayesian_models_correlation/cor_bivariate/eyetr_eyetr_gaze_duration_cor.rds"))
print("Go Past Time")
[1] "Go Past Time"
egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)
   cor 
0.8481 
# View(egd_df)

egpt_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))

fit_egpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds"))
print("Total Duration")
[1] "Total Duration"
etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)
   cor 
0.9205 
# View(egd_df)

etd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
     main = "Total Duration Not Log-Transformed")

etd_data = list(x=etd_temp, N=nrow(etd_temp))

fit_etd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=etd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds"))
print("Fisrt Pass Regression Prob.")
[1] "Fisrt Pass Regression Prob."
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  # filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  
  # ==== for normal data drop0s ====
  # filter(value_1 > 0, value_2 > 0) %>%
  # mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
  #        eyetr_value_2 = pmax(value_2, 1e-5)
  # )
  
  # ==== for ranking the data drop0s ====
    mutate(
    eyetr_value_1 = ifelse(value_1 > 0, rank(value_1), NA),
    eyetr_value_2 = ifelse(value_2 > 0, rank(value_2), NA)
  ) %>%
  drop_na()

  # ==== for ranking the data w/ 0s ====
  #   mutate(
  #   eyetr_value_1 = rank(value_1, ties.method = "average"),
  #   eyetr_value_2 = rank(value_2, ties.method = "average")
  # )


print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
   cor 
0.5809 
# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg Not Log-Transformed")

# -------fit model FPReg ----------

# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
  file="stan_models/bivariate_correlation_rank.stan", 
  data=ereg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_rank_drop0s.rds"))
fit_egd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_gaze_duration_cor_drop0s.rds")
fit_egpt = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor_drop0s.rds")
fit_etd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor_drop0s.rds")
fit_ereg = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_egd)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean    sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           234.21    0.01  1.00    232.27    233.54    234.19    234.87    236.20  4635    1
mu[2]           236.90    0.01  0.97    235.02    236.24    236.90    237.54    238.79  4910    1
sigma[1]         41.69    0.02  0.91     39.91     41.08     41.68     42.29     43.51  3619    1
sigma[2]         40.43    0.01  0.85     38.80     39.85     40.42     41.00     42.11  3582    1
nu                4.79    0.01  0.34      4.18      4.55      4.77      5.01      5.49  4490    1
rho               0.75    0.00  0.01      0.73      0.74      0.75      0.75      0.77  5064    1
cov[1,1]       1738.83    1.26 75.69   1593.04   1687.35   1737.16   1788.84   1892.83  3618    1
cov[1,2]       1261.40    1.07 60.61   1144.60   1219.78   1259.97   1301.33   1383.64  3231    1
cov[2,1]       1261.40    1.07 60.61   1144.60   1219.78   1259.97   1301.33   1383.64  3231    1
cov[2,2]       1635.31    1.15 68.73   1505.18   1588.13   1633.89   1680.99   1772.85  3581    1
x_rand[1]       235.04    0.60 53.55    126.81    204.21    234.45    265.21    343.46  8071    1
x_rand[2]       238.08    0.58 52.25    134.49    207.68    237.12    267.21    343.68  8199    1
attempt           0.00    0.00  0.06      0.00      0.00      0.00      0.00      0.00  7607    1
max_attempts     10.00     NaN  0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -22639.73    0.03  1.75 -22644.12 -22640.68 -22639.40 -22638.44 -22637.35  3653    1

Samples were drawn using NUTS(diag_e) at Mon Feb  5 23:34:23 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_egpt)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           303.70    0.03   2.02    299.63    302.37    303.70    305.04    307.61  6230    1
mu[2]           302.11    0.02   1.80    298.65    300.89    302.10    303.33    305.69  6262    1
sigma[1]         77.62    0.03   1.77     74.18     76.42     77.61     78.81     81.07  4594    1
sigma[2]         68.55    0.02   1.51     65.64     67.52     68.54     69.56     71.58  4950    1
nu                2.33    0.00   0.10      2.13      2.25      2.32      2.40      2.53  5695    1
rho               0.63    0.00   0.01      0.60      0.62      0.63      0.64      0.66  7305    1
cov[1,1]       6028.02    4.05 274.38   5502.89   5840.25   6022.79   6211.77   6572.65  4595    1
cov[1,2]       3372.29    2.65 178.45   3030.75   3248.64   3367.12   3492.40   3732.72  4544    1
cov[2,1]       3372.29    2.65 178.45   3030.75   3248.64   3367.12   3492.40   3732.72  4544    1
cov[2,2]       4701.89    2.95 207.63   4308.25   4559.35   4698.02   4838.38   5123.15  4963    1
x_rand[1]       316.81    1.63 144.61     99.38    247.09    305.30    366.85    597.63  7836    1
x_rand[2]       312.58    1.38 123.98    111.40    252.31    305.23    357.38    569.45  8117    1
attempt           0.03    0.00   0.18      0.00      0.00      0.00      0.00      1.00  7784    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -26967.65    0.03   1.74 -26971.89 -26968.58 -26967.31 -26966.37 -26965.23  3624    1

Samples were drawn using NUTS(diag_e) at Mon Feb  5 23:38:33 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_etd)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                  mean se_mean     sd      2.5%       25%       50%       75%     97.5% n_eff Rhat
mu[1]           272.77    0.02   1.40    270.02    271.83    272.77    273.69    275.55  4475    1
mu[2]           267.14    0.02   1.28    264.64    266.26    267.12    268.00    269.68  4593    1
sigma[1]         60.01    0.02   1.28     57.55     59.15     60.00     60.86     62.53  3656    1
sigma[2]         54.94    0.02   1.12     52.79     54.18     54.92     55.70     57.19  3673    1
nu                5.27    0.01   0.38      4.58      5.01      5.25      5.51      6.05  4498    1
rho               0.81    0.00   0.01      0.79      0.80      0.81      0.81      0.82  4869    1
cov[1,1]       3603.09    2.54 153.48   3311.44   3498.40   3599.66   3703.53   3909.89  3659    1
cov[1,2]       2670.69    2.11 121.75   2440.79   2585.48   2667.90   2750.53   2920.02  3333    1
cov[2,1]       2670.69    2.11 121.75   2440.79   2585.48   2667.90   2750.53   2920.02  3333    1
cov[2,2]       3019.99    2.04 123.51   2787.14   2935.61   3016.10   3102.72   3271.26  3671    1
x_rand[1]       273.92    0.82  73.68    128.22    230.21    274.02    316.06    420.89  8061    1
x_rand[2]       269.13    0.75  67.33    133.79    228.98    268.90    307.33    404.88  8079    1
attempt           0.00    0.00   0.06      0.00      0.00      0.00      0.00      0.00  7660    1
max_attempts     10.00     NaN   0.00     10.00     10.00     10.00     10.00     10.00   NaN  NaN
lp__         -24232.17    0.03   1.77 -24236.45 -24233.10 -24231.82 -24230.89 -24229.77  3293    1

Samples were drawn using NUTS(diag_e) at Mon Feb  5 23:41:09 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_ereg)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1; 
post-warmup draws per chain=2000, total post-warmup draws=8000.

                mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
mu[1]           0.12    0.00 0.00    0.12    0.12    0.12    0.13    0.13  4915    1
mu[2]           0.13    0.00 0.00    0.12    0.12    0.13    0.13    0.13  5058    1
sigma[1]        0.10    0.00 0.00    0.09    0.09    0.10    0.10    0.10  3600    1
sigma[2]        0.09    0.00 0.00    0.09    0.09    0.09    0.09    0.10  3854    1
nu              3.84    0.00 0.22    3.43    3.69    3.83    3.99    4.30  4433    1
rho             0.67    0.00 0.01    0.65    0.66    0.67    0.68    0.70  5339    1
cov[1,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  3597    1
cov[1,2]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  3356    1
cov[2,1]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  3356    1
cov[2,2]        0.01    0.00 0.00    0.01    0.01    0.01    0.01    0.01  3847    1
x_rand[1]       0.16    0.00 0.11    0.02    0.09    0.15    0.21    0.41  7785    1
x_rand[2]       0.16    0.00 0.10    0.02    0.09    0.15    0.21    0.40  8078    1
attempt         0.23    0.01 0.53    0.00    0.00    0.00    0.00    2.00  8328    1
max_attempts   10.00     NaN 0.00   10.00   10.00   10.00   10.00   10.00   NaN  NaN
lp__         7379.08    0.03 1.74 7374.84 7378.15 7379.40 7380.38 7381.48  3314    1

Samples were drawn using NUTS(diag_e) at Mon Feb  5 21:48:17 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)

# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)

# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)

# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7479
HPD: [0.7278, 0.7669]
crI: [0.7278, 0.767]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6334
HPD: [0.6052, 0.6617]
crI: [0.6044, 0.6612]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8095
HPD: [0.7939, 0.825]
crI: [0.7937, 0.8248]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.6735
HPD: [0.6487, 0.6993]
crI: [0.6477, 0.6986]
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

Bayesian – correlation between MoTR and word level statistics

print("Log Frequency")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(7, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
  data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Frequency")

# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Frequency")

motr & frequency

mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))

fit_mfreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=mfreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds"))

eyetr & frequency

efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))

fit_efreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=efreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds"))
print("Length")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(9, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
  data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Length")

# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Length")

motr & length

mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))

fit_mlen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=mlen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds"))

eyetr & length

elen_data = list(x=elen_temp, N=nrow(elen_temp))

fit_elen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=elen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds"))
print("Surprisal")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(8, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
  data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Surprisal")

# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Surprisal")

motr & surprisal

msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))

fit_msurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=msurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds"))

eyetr & surprisal

esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))

fit_esurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=esurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds"))
fit_mfreq = readRDS("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds")

print('---------------------------- MoTR & Log Frequency --------------------------------------------')
print(fit_mfreq)
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
print(fit_efreq)
print('---------------------------- MoTR & Length --------------------------------------------')
print(fit_mlen)
print('---------------------------- EyeTR & Length --------------------------------------------')
print(fit_elen)
print('---------------------------- MoTR & Surprisal --------------------------------------------')
print(fit_msurp)
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
print(fit_esurp)
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)

# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)

# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)

# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)

# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)

# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)
print('---------------------------- MoTR & Log Freq --------------------------------------------')
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Log Freq --------------------------------------------')
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Length --------------------------------------------')
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Length --------------------------------------------')
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
     xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
     xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5))

ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)

ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high) 

print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
     main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
     main = "FPReg > 0.3 Not Log-Transformed")
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_all_data.rds"))

exploratory: divide eye tracking regression data into two parts.

# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_03-1.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
---
title: "Correlation Analysis for MoTR on Provo Data"
output: html_notebook
---

```{r}
shhh <- suppressPackageStartupMessages # It's a library, so shhh!

shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))

shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)

shhh(library(car))
shhh(library(coda))
shhh(library(gridExtra))

theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}

```


# Read in MoTR Data

```{r}

rate = 160

file_prefix = "../data/provo_f160/"
fnames = list.files(path=file_prefix)

df = data.frame()
for (f in fnames) {
  temp = read.csv(paste0(file_prefix, "/", f)) %>%
    mutate(subj = str_remove(f, "_reading_measures.csv"))
  df = rbind(df, temp)
}

# Filter out readers whose accuracy to the comprehension questions were less than 80%.
filter_df = df %>%
  group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
  drop_na() %>%
  group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
  mutate(p_correct = round(p_correct, digits = 2))

filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
pilot_exceptions <- c("reader_255", "reader_256", "reader_259", "reader_261", "reader_262", "reader_263")

raw_df = df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "reader_")) %>%
  mutate(subj = as.character(subj)) %>%
  mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
  dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)
length(unique(raw_df$subj))

df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  filter(FPReg >= 0) %>%
  dplyr::select(FPReg) %>%
  drop_na() %>%
  summarise( m = mean(FPReg))

df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  dplyr::select(FPFix) %>%
  drop_na() %>%
  summarise( m = mean(FPFix))


```

```{r}
# View(raw_df)
```


```{r}
# Average across subjects
motr_agg_df = raw_df %>%
  gather(metric, value, 6:10) %>%
    filter(value >= 0) %>% #Removes the "NA" values for FPReg
  
    # ==== Remove skipped words
    # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
    # filter(zero == F) %>%
  
    drop_na() %>%
    group_by(para_nr, word_nr, word, metric) %>%
    
    # === Remove outliers > 3SD
      # mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
  
      summarise(value = mean(value), nsubj = length(unique(subj))) %>%
  ungroup() %>%
  arrange(para_nr, word_nr) %>%
  rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
# View(motr_agg_df)

```




# Comparison to Provo


```{r}
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../data/provo_stats.csv") %>%
  dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
  rename(word_idx = trigger_idx)

provo_modeling_df

```

```{r}
# Read in Provo eyetracking data

provo_raw_df = read.csv("../data/provo_eyetracking.csv")

```

```{r}

# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))

provo_eyetracking_df = provo_raw_df %>%
  dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
  rename( #first_duration = IA_FIRST_FIXATION_DURATION,   
          gaze_duration = IA_FIRST_RUN_DWELL_TIME,
          total_duration = IA_DWELL_TIME,
          go_past_time = IA_REGRESSION_PATH_DURATION,
          subj = Participant_ID,
          text_id = Text_ID,
          sent_id = Sentence_Number,
          word_idx = Word_In_Sentence_Number,
          word_text_idx = Word_Number,   # IA_ID?
          word = Word,      # Word?
          FPReg = IA_REGRESSION_OUT,
          skip = IA_SKIP,
          ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
  mutate(first_duration = gaze_duration) %>%
  mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
         go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
  dplyr::select(-ff_progressive) %>%
  
  mutate(
    gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
      go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
      FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
      first_duration =  if_else(total_duration == 0, 0, as.double(first_duration)),
  ) %>%
  gather(metric, value, 7:12) %>%
  filter(value >= 0) %>%          # filter skipped word in eye tracking data for FPReg
  
  # ==== Remove skipped words
  # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
  # filter(zero == F) %>%
  
  # mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
  # mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
  drop_na() %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "Sub")) %>%
  mutate(subj = as.integer(subj)) %>%
    group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
  
  # === Remove outliers > 3SD
    # mutate(outlier = if_else(! metric %in% c("FPReg", "skip") & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
    # filter(outlier == F) %>%
  
  ungroup() #%>%

# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value),
              nsubj = length(unique(subj))) %>%
    ungroup()

provo_raw_df %>%
  dplyr::select(IA_REGRESSION_OUT) %>%
  drop_na() %>%
  summarise( m = mean(IA_REGRESSION_OUT))

provo_raw_df %>%
  dplyr::select(IA_SKIP) %>%
  drop_na() %>%
  summarise( m = mean(IA_SKIP))

```

```{r}

# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
  filter(subj <= 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
  rename(value_1 = value) #%>%
  # dplyr::select(-sent_id, -word_idx)


provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
  arrange(text_id, sent_id, word_idx) %>%
  filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
  rename(word = word.y) %>%
  dplyr::select(text_id, word_text_idx, metric, word, value_1) 

# View(provo_eyetracking_subj1_df)

provo_eyetracking_subj2_df = provo_eyetracking_df %>%
  filter(subj > 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
    rename(value_2 = value)%>%
  dplyr::select(-sent_id, -word_idx)

# View(provo_eyetracking_subj2_df)
  
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
  # filter(word.x == word.y) %>%
  dplyr::select(-word.y) %>%
  # === Remove outliers > 3SD
  # group_by(metric) %>%
  #   mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
  #   filter(motr_outlier == F) %>%
  #   mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
  #   filter(eyetr_outlier == F) %>%
  # ungroup() %>%
  
  gather(measure, value, c("value_1", "value_2")) #%>%
  # dplyr::select(-motr_outlier, -eyetr_outlier)

# View(provo_eyetr_grouped_df)

```


```{r}
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  arrange(text_id, sent_id, word_idx) %>%
  rename(eyetr_value = value) 

provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
  # almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
  
# === Remove outliers > 3SD
# group_by(metric) %>%
#   mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
#   filter(motr_outlier == F) %>%
#   mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
#   filter(eyetr_outlier == F) %>%
# ungroup() %>%
  
gather(measure, value, c("eyetr_value", "motr_value")) #%>%
# dplyr::select(-motr_outlier, -eyetr_outlier)
  
# provo_df
```


# Bayesian -- use Stan -- motr & eyetr correlation
```{r}
print("Gaze Duration")
gd_df = provo_df %>% filter(metric == "gaze_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
# View(gd_df)
```


```{r}
gd_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")
  
```



```{r}
# center data around 0.

gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
   # mutate(eyetr_value = eyetr_value - mean(eyetr_value),
   #      motr_value = motr_value - mean(motr_value)) %>%
  data.matrix()

gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

```



```{r, eval=FALSE}
gd_data = list(x=gd_temp, N=nrow(gd_temp))

fit_gd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds"))

```

```{r}
print("Go Past Time")
gpt_df = provo_df %>% filter(metric == "go_past_time") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)

gpt_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()

gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds"))
```


```{r}
print("Total Duration")
td_df = provo_df %>% filter(metric == "total_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)

td_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()

td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=td_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds"))
```

```{r}
print("First Pass Regression Prob.")
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)

# View(reg_df)
reg_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds"))
```

# rank transformation for FPReg
```{r, eval=FALSE}
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  mutate(
    eyetr_value_rank = ifelse(eyetr_value > 0, rank(eyetr_value), NA),
    motr_value_rank = ifelse(motr_value > 0, rank(motr_value), NA)
  ) %>%
  drop_na()
#   mutate(
#   eyetr_value_rank = rank(eyetr_value, ties.method = "average"),
#   motr_value_rank = rank(motr_value, ties.method = "average")
# )
# View(reg_df)

print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank)$estimate)
print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank, method = "kendall"))


reg_df %>% 
  gather(measure, value, 14:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value_rank", "motr_value_rank")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Rank Transformed")
  
```

```{r, eval=FALSE}
# -------fit model FPReg RANK----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_correlation_rank.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds"))
```


```{r}
fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds")
fit_reg_rank = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds")

# models for drop 0s
# fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_motr_eyetr_FPReg_cor.rds")

print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_gd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_gpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_td)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_reg)
print('---------------------------- First Pass Regression Prob. RANK--------------------------------------------')
print(fit_reg_rank)

```

```{r, eval=FALSE}
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)

# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)

# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)

# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)

```

```{r, fig.width=5, fig.height=10, eval=FALSE}
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)


# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)

```



```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression RANK--------------------------------------------')
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg_rank, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
```


```{r, eval=FALSE}
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


```

# model motr eyetr FPReg correlation (eyetr < 0.3)
```{r}
print("First Pass Regression Prob. all and  < 0.3")
reg_df_all = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))

reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)

reg_df_low = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_low %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
```


# model motr eyetr FPReg correlation (eyetr >= 0.3)
```{r}
print("First Pass Regression Prob. >= 0.3")
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)

reg_df_high = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_high %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
     main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
     main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
     main = "FPReg not logged eyetr >= 0.3")
```

```{r, eval=FALSE}
# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0(".//bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
```



```{r}
fit_mreg_all = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1_drop0s.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_mreg_all)
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
print(fit_mreg_all_drop0)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_mreg_low)
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
print(fit_mreg_low_drop0)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_mreg_high)
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
print(fit_mreg_high_drop0)
```

```{r}
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)

# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)

# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
# 
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)

# FPReg > 0.3
stan_trace(fit_mreg_high)
stan_dens(fit_mreg_high, separate_chains = TRUE)
stan_plot(fit_mreg_high)

stan_trace(fit_mreg_high_drop0)
stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
stan_plot(fit_mreg_high_drop0)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r, eval=FALSE}
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")

# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


# model eye tracking to eye tracking correlation
```{r}

print("Gaze Duration")
# View(provo_eyetr_grouped_df)

egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)

# View(egd_df)

egd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

```

```{r, eval=FALSE}
egd_data = list(x=egd_temp, N=nrow(egd_temp))

fit_egd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models/bayesian_models_correlation/cor_bivariate/eyetr_eyetr_gaze_duration_cor.rds"))

```

```{r}
print("Go Past Time")

egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)

# View(egd_df)

egpt_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))

fit_egpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds"))

```


```{r}
print("Total Duration")

etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)

# View(egd_df)

etd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
     main = "Total Duration Not Log-Transformed")
```


```{r, eval=FALSE}
etd_data = list(x=etd_temp, N=nrow(etd_temp))

fit_etd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=etd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds"))
```


```{r}
print("Fisrt Pass Regression Prob.")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  # filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  
  # ==== for normal data drop0s ====
  # filter(value_1 > 0, value_2 > 0) %>%
  # mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
  #        eyetr_value_2 = pmax(value_2, 1e-5)
  # )
  
  # ==== for ranking the data drop0s ====
    mutate(
    eyetr_value_1 = ifelse(value_1 > 0, rank(value_1), NA),
    eyetr_value_2 = ifelse(value_2 > 0, rank(value_2), NA)
  ) %>%
  drop_na()

  # ==== for ranking the data w/ 0s ====
  #   mutate(
  #   eyetr_value_1 = rank(value_1, ties.method = "average"),
  #   eyetr_value_2 = rank(value_2, ties.method = "average")
  # )


print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------

# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
  file="stan_models/bivariate_correlation_rank.stan", 
  data=ereg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_rank_drop0s.rds"))
```


```{r}
fit_egd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_gaze_duration_cor_drop0s.rds")
fit_egpt = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor_drop0s.rds")
fit_etd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor_drop0s.rds")
fit_ereg = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_drop0s.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_egd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_egpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_etd)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_ereg)
```


```{r, eval=FALSE}
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)

# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)

# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)

# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
```


```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
```


```{r, eval=FALSE}
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
```


# Bayesian -- correlation between MoTR and word level statistics
```{r}
print("Log Frequency")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(7, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
  data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Frequency")

# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Frequency")

```
# motr & frequency
```{r, eval=FALSE}
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))

fit_mfreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=mfreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds"))
```

# eyetr & frequency
```{r, eval=FALSE}
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))

fit_efreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=efreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds"))
```

```{r}
print("Length")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(9, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
  data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Length")

# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Length")
```

# motr & length
```{r, eval=FALSE}
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))

fit_mlen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=mlen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds"))

```

# eyetr & length
```{r, eval=FALSE}
elen_data = list(x=elen_temp, N=nrow(elen_temp))

fit_elen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=elen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds"))
```


```{r}
print("Surprisal")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(8, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
  data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Surprisal")

# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Surprisal")
```
# motr & surprisal
```{r, eval=FALSE}
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))

fit_msurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=msurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds"))

```

# eyetr & surprisal
```{r, eval=FALSE}
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))

fit_esurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=esurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds"))

```


```{r}
fit_mfreq = readRDS("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds")

print('---------------------------- MoTR & Log Frequency --------------------------------------------')
print(fit_mfreq)
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
print(fit_efreq)
print('---------------------------- MoTR & Length --------------------------------------------')
print(fit_mlen)
print('---------------------------- EyeTR & Length --------------------------------------------')
print(fit_elen)
print('---------------------------- MoTR & Surprisal --------------------------------------------')
print(fit_msurp)
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
print(fit_esurp)

```

```{r, eval=FALSE}
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)

# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)

# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)

# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)

# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)

# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)

```


```{r}
print('---------------------------- MoTR & Log Freq --------------------------------------------')
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Log Freq --------------------------------------------')
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Length --------------------------------------------')
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Length --------------------------------------------')
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")

```



```{r,eval=FALSE}
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
     xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
     xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


```{r}
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5))

ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)

ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high) 

print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
     main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
     main = "FPReg > 0.3 Not Log-Transformed")
```


```{r, eval=FALSE}
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_all_data.rds"))
```



# exploratory: divide eye tracking regression data into two parts.
```{r}
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_03-1.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)

```


```{r}
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r}
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```
